reqpkg <- c("ggplot2","plotly","DESeq2","magrittr","dplyr","genefilter","AnnotationHub","org.Mm.eg.db","GO.db","vsn","pheatmap","biomaRt","curl","RColorBrewer","viridis","fgsea","tidyverse","DT","ggpubr")
for (i in reqpkg) {
suppressWarnings(suppressMessages(library(i, quietly=T, verbose=T, warn.conflicts=F,character.only=T)))
print(i)
}
## [1] "ggplot2"
## [1] "plotly"
## [1] "DESeq2"
## [1] "magrittr"
## [1] "dplyr"
## [1] "genefilter"
## [1] "AnnotationHub"
## [1] "org.Mm.eg.db"
## [1] "GO.db"
## [1] "vsn"
## [1] "pheatmap"
## [1] "biomaRt"
## [1] "curl"
## [1] "RColorBrewer"
## [1] "viridis"
## [1] "fgsea"
## [1] "tidyverse"
## [1] "DT"
## [1] "ggpubr"
theme_set(theme_pubr())
select <- dplyr::select
ensembl <- useMart("ensembl")
ensemblMouse <- useDataset("mmusculus_gene_ensembl", mart=ensembl)
mouseProteinCodingGenes <- getBM(
attributes=c("ensembl_gene_id","external_gene_name","description"),
filters='biotype',
values=c('protein_coding'),
mart=ensemblMouse)
df <- read.csv("combatSeq_corrected_data.csv") %>%
.[rowSums(sapply(., '%in%', mouseProteinCodingGenes$external_gene_name)) > 0,] %>%
set_rownames(.$X) %>%
select(-X) %>%
select(c(-MF.13,-MF.17,-MF.23))
sex_list <- c(rep("female", 15), rep("male", 14))
cond_list <- c(rep("12wConv", 4), rep("4wConv", 4), rep("GF", 3), rep("SPF", 4), rep("12wConv", 3), rep("4wConv", 3), rep("GF", 4), rep("SPF", 4))
condition_list <- data.frame(row.names=colnames(df), Sex=sex_list, Condition=cond_list)
condition_list$Sex <- relevel(condition_list$Sex, "female")
condition_list$Condition <- relevel(condition_list$Condition, "SPF")
dds <- DESeq2::DESeqDataSetFromMatrix(countData=df,
colData = condition_list,
design = ~ Sex + Condition + Sex:Condition)
head(dds)
## class: DESeqDataSet
## dim: 6 29
## metadata(1): version
## assays(1): counts
## rownames(6): Rp1 Sox17 ... Gm37988 Tcea1
## rowData names(0):
## colnames(29): MF.20 MF.24 ... MF.11 MF.12
## colData names(2): Sex Condition
dds <- estimateSizeFactors(dds)
colData(dds)
## DataFrame with 29 rows and 3 columns
## Sex Condition sizeFactor
## <factor> <factor> <numeric>
## MF.20 female 12wConv 1.08104749069605
## MF.24 female 12wConv 1.1524481517802
## MF.1 female 12wConv 0.948474051294536
## MF.3 female 12wConv 0.93478546312708
## MF.19 female 4wConv 1.10897808920126
## ... ... ... ...
## MF.9 male GF 0.926910147270914
## MF.27 male SPF 1.10699861761705
## MF.28 male SPF 1.08840541444753
## MF.11 male SPF 0.971416269261023
## MF.12 male SPF 0.922861141150383
dds$Group <- factor(paste0(dds$Sex, dds$Condition), levels = c("femaleSPF", "female12wConv", "female4wConv", "femaleGF", "maleSPF", "male12wConv", "male4wConv", "maleGF"))
design(dds) <- ~ Group
vsd <- vst(dds, blind = TRUE)
head(assay(vsd), 3)
## MF.20 MF.24 MF.1 MF.3 MF.19 MF.21 MF.2 MF.4
## Rp1 6.483520 6.845312 6.658565 6.660960 6.833048 6.839088 6.963209 6.948694
## Sox17 8.684651 8.327021 8.215158 8.540489 8.040992 8.179754 8.384219 8.055689
## Mrpl15 9.766815 10.097341 9.929943 9.899744 9.852936 9.983695 9.854249 9.930592
## MF.29 MF.30 MF.14 MF.31 MF.32 MF.15 MF.16
## Rp1 6.328831 6.857582 6.704921 6.683618 6.601405 6.567786 6.494626
## Sox17 8.674499 8.485366 8.377268 8.052071 8.026195 8.124486 8.147617
## Mrpl15 10.071977 10.247321 9.989980 9.889346 10.043617 9.877673 9.893376
## MF.22 MF.6 MF.8 MF.18 MF.5 MF.7 MF.25 MF.26
## Rp1 6.760564 7.005746 6.697136 6.854662 6.975232 6.730715 6.671619 6.900810
## Sox17 8.114032 8.370795 8.289486 8.444027 7.990524 8.364131 8.331712 8.693452
## Mrpl15 9.725629 9.845922 9.816283 9.896309 9.827274 9.877667 9.860833 9.907690
## MF.10 MF.9 MF.27 MF.28 MF.11 MF.12
## Rp1 6.667650 6.799475 6.810229 6.959737 6.842398 6.702335
## Sox17 8.347725 8.371263 8.205323 8.027686 8.483256 8.521549
## Mrpl15 9.853993 9.911690 9.685097 10.097632 9.978403 9.933225
colData(vsd)
## DataFrame with 29 rows and 4 columns
## Sex Condition sizeFactor Group
## <factor> <factor> <numeric> <factor>
## MF.20 female 12wConv 1.08104749069605 female12wConv
## MF.24 female 12wConv 1.1524481517802 female12wConv
## MF.1 female 12wConv 0.948474051294536 female12wConv
## MF.3 female 12wConv 0.93478546312708 female12wConv
## MF.19 female 4wConv 1.10897808920126 female4wConv
## ... ... ... ... ...
## MF.9 male GF 0.926910147270914 maleGF
## MF.27 male SPF 1.10699861761705 maleSPF
## MF.28 male SPF 1.08840541444753 maleSPF
## MF.11 male SPF 0.971416269261023 maleSPF
## MF.12 male SPF 0.922861141150383 maleSPF
rld <- rlog(dds, blind=FALSE)
ntd <- normTransform(dds)
meanSdPlot(assay(ntd))
meanSdPlot(assay(vsd))
meanSdPlot(assay(rld))
vst(dds[,1:15]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, female only")
vst(dds[,16:29]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, male only")
plotPCA(vsd, intgroup=c("Sex","Condition")) + ggtitle("vst")
plotPCA(vsd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1)
plotPCA(ntd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("nld")
plotPCA(rld, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("rld")
dds <- DESeq(dds)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#Compare SPF v GF in F
res_1 <- results(dds,contrast=c("Group", "femaleSPF", "femaleGF"), tidy = TRUE)
#Compare 4w vs 12w in F
res_2 <- results(dds, contrast=c("Group", "female4wConv", "female12wConv"), tidy = TRUE)
#Compare SPF vs 4w in F
res_3 <- results(dds, contrast=c("Group", "femaleSPF", "female4wConv"), tidy = TRUE)
#Compare SPF vs 12w in F
res_4 <- results(dds, contrast=c("Group", "femaleSPF", "female12wConv"), tidy = TRUE)
#Compare SPF v GF in M
res_5 <- results(dds,contrast=c("Group", "maleSPF", "maleGF"), tidy = TRUE)
#Compare 4w vs 12w in M
res_6 <- results(dds, contrast=c("Group", "male4wConv", "male12wConv"), tidy = TRUE)
#Compare SPF vs 4w in M
res_7 <- results(dds, contrast=c("Group", "maleSPF", "male4wConv"), tidy = TRUE)
#Compare SPF vs 12w in M
res_8 <- results(dds, contrast=c("Group", "maleSPF", "male12wConv"), tidy = TRUE)
sum(res_1$pvalue < 0.05, na.rm=TRUE)
## [1] 3738
sum(!is.na(res_1$pvalue))
## [1] 18757
sum(res_1$padj < 0.1, na.rm=TRUE)
## [1] 2321
sum(res_2$pvalue < 0.05, na.rm=TRUE)
## [1] 575
sum(!is.na(res_2$pvalue))
## [1] 18757
sum(res_2$padj < 0.1, na.rm=TRUE)
## [1] 4
sum(res_3$pvalue < 0.05, na.rm=TRUE)
## [1] 3555
sum(!is.na(res_3$pvalue))
## [1] 18757
sum(res_3$padj < 0.1, na.rm=TRUE)
## [1] 1964
sum(res_4$pvalue < 0.05, na.rm=TRUE)
## [1] 3261
sum(!is.na(res_4$pvalue))
## [1] 18757
sum(res_4$padj < 0.1, na.rm=TRUE)
## [1] 1739
sum(res_5$pvalue < 0.05, na.rm=TRUE)
## [1] 3012
sum(!is.na(res_5$pvalue))
## [1] 18757
sum(res_5$padj < 0.1, na.rm=TRUE)
## [1] 1280
sum(res_6$pvalue < 0.05, na.rm=TRUE)
## [1] 642
sum(!is.na(res_6$pvalue))
## [1] 18757
sum(res_6$padj < 0.1, na.rm=TRUE)
## [1] 0
sum(res_7$pvalue < 0.05, na.rm=TRUE)
## [1] 1833
sum(!is.na(res_7$pvalue))
## [1] 18757
sum(res_7$padj < 0.1, na.rm=TRUE)
## [1] 358
sum(res_8$pvalue < 0.05, na.rm=TRUE)
## [1] 1944
sum(!is.na(res_8$pvalue))
## [1] 18757
sum(res_8$padj < 0.1, na.rm=TRUE)
## [1] 357
resultsNames(dds)
## [1] "Intercept" "Group_female12wConv_vs_femaleSPF"
## [3] "Group_female4wConv_vs_femaleSPF" "Group_femaleGF_vs_femaleSPF"
## [5] "Group_maleSPF_vs_femaleSPF" "Group_male12wConv_vs_femaleSPF"
## [7] "Group_male4wConv_vs_femaleSPF" "Group_maleGF_vs_femaleSPF"
condition_LFC <- lfcShrink(dds, coef = "Group_female12wConv_vs_femaleSPF", type = 'apeglm')
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(condition_LFC, ylim=c(-2,2))
topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 50)
mat <- assay(vsd)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("Sex","Condition")])
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno, cluster_cols = F)
pheatmap(mat, annotation_col = anno)
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno)
pathway_folder <- 'pathways/'
fgsea_output <- list()
ranks <- list()
mart <- useDataset("mmusculus_gene_ensembl", mart=useMart("ensembl"))
bm <- getBM(attributes=c("external_gene_name", "hsapiens_homolog_associated_gene_name"), mart=mart) %>%
distinct() %>%
as_tibble() %>%
na_if("") %>%
na.omit()
fGSEA_pipe <- function(res, path, title, pres=FALSE) {
res2 <- inner_join(res_1, bm, by=c("row"="external_gene_name")) %>%
select(hsapiens_homolog_associated_gene_name, stat) %>%
na.omit() %>%
distinct() %>%
group_by(hsapiens_homolog_associated_gene_name) %>%
summarize(stat=mean(stat))
ranks <- deframe(res2)
if(pres) head(ranks, 20)
if(path == "hallmark") p <- "h.all.v7.0.symbols.gmt"
else if(path == "KEGG") p <- "c2.cp.kegg.v7.0.symbols.gmt"
else if(path == "mir") p <- "c3.mir.v7.0.symbols.gmt"
else if(path == "GO_all") p <- "c5.all.v7.0.symbols.gmt"
else if(path == "GO_BP") p <- "c5.bp.v7.0.symbols.gmt"
fgseaResTidy <- fgsea(pathways=gmtPathways(paste0(pathway_folder, p)), stats=ranks, nperm=100000) %>%
as_tibble() %>%
filter(padj < 0.05) %>%
arrange(desc(NES))
p <- ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col() +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title=paste0(title, ": ", path, " pathways NES from GSEA")) +
theme_minimal()
return(p)
}
fGSEA_pipe(res_1, "hallmark", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_1, "KEGG", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_1, "GO_BP", "Female SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_2, "hallmark", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_2, "KEGG", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_2, "GO_BP", "Female 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_3, "hallmark", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_3, "KEGG", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_3, "GO_BP", "Female SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_4, "hallmark", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_4, "KEGG", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_4, "GO_BP", "Female SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_5, "hallmark", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_5, "KEGG", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_5, "GO_BP", "Male SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_6, "hallmark", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_6, "KEGG", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_6, "GO_BP", "Male 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_7, "hallmark", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_7, "KEGG", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_7, "GO_BP", "Male SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_8, "hallmark", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_8, "KEGG", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_8, "GO_BP", "Male SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.16% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
smanzoor@uchicago.edu